#################################################################################
##########################      Lab6 Prophet      ###############################
##############    -------------- AirQuality ------------- #######################
#################################################################################

# Load necessary libraries
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2   3.5.1      ✔ fma       2.5   
## ✔ forecast  8.22.0     ✔ expsmooth 2.3
## 
library(prophet) # https://facebook.github.io/prophet/docs/quick_start.html
## Loading required package: Rcpp
## Loading required package: rlang
library(MLTools)
##    __    __   _            _________
##   |  \  /  | | |          |___   ___| _____   _____   _      _____
##   |   \/   | | |     ____     | |    |  _  | |  _  | | |    |  ___|
##   | |\  /| | | |    |____|    | |    | | | | | | | | | |    |___  |
##   | | \/ | | | |____          | |    | |_| | | |_| | | |__   ___| |
##   |_|    |_| |______|         |_|    |_____| |_____| |____| |_____|
## 
##            Learning is fun, Machine Learning is funnier
##             -----------------------------------------
##            With great power comes great responsibility
##             -----------------------------------------
## Created by: Jos<e9> Portela Gonz<e1>lez <Jose.Portela@iit.comillas.edu>
## Guillermo Mestre Marcos <Guillermo.Mestre@comillas.edu> Jaime Pizarroso Gonzalo
## <jpizarroso@comillas.edu> Antonio Mu<f1>oz San Roque
## <antonio.munoz@iit.comillas.edu>
## 
## Escuela T<e9>cnica Superior de Ingenier<ed>a ICAI
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(dplyr)     # For prophet_df manipulation
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Set working directory -------------------------------------------------------
#setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

# 0. Load data -----------------------------------------------------------------
fdata <- read.table("prophet_data.csv", sep = ";", header = TRUE)
head(fdata)
##         date     output     input1   input2
## 1 2015-01-01  0.3947508 0.01325662 7.713629
## 2 2015-01-02  0.7071801 0.01314552 7.308481
## 3 2015-01-03 -0.7920484 0.01264853 7.474993
## 4 2015-01-04 -0.3877153 0.01215471 6.913461
## 5 2015-01-05 -2.4661411 0.01166405 7.110118
## 6 2015-01-06 -2.3676671 0.01117658 6.310578
fdata$date <- as.Date(fdata$date)

# Order and Check for missing dates
fdata <- fdata |> arrange(date)
date_range <- seq(min(fdata$date), max(fdata$date), by = "day")
date_range[!date_range %in% fdata$date] 
## Date of length 0
sum(is.na(fdata$output))
## [1] 0
# Check for duplicates
length(fdata$date) 
## [1] 2922
length(unique(fdata$date))
## [1] 2922
ggtsdisplay(fdata$output, lag.max=100)

# Leave a gap of 182 days for testing
train_data <- fdata[1:(nrow(fdata) - 182), ]
test_data <- fdata[(nrow(fdata) - 182 + 1):nrow(fdata), ]

prophet_df <- data.frame(
    ds = train_data$date, # Dates must be named ds
    # y = train_data$output # Output must be named y
    y = train_data$output, # Output must be named y
    x1 = train_data$input1, # Input can be named whatever, x1 for convenience
    x2 = train_data$input2 # Input can be named whatever, x2 for convenience
)


# 1. Model without seasonality -------------------------------------------------
model1 <- prophet(
    yearly.seasonality = FALSE,
    weekly.seasonality = FALSE,
    daily.seasonality = FALSE
) %>% fit.prophet(prophet_df)

# Plot the results
predictions <- predict(model1, prophet_df)
p <- plot(model1, predictions) + add_changepoints_to_plot(model1)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model1, predictions)

# Check residuals
residuals_model_1 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_1, 36)

# 2. Model with custom changepoints --------------------------------------------
changepoints <- as.Date(c("2017-01-01", "2019-06-01", "2022-01-01"))
model2 <- prophet(
    yearly.seasonality = FALSE,
    weekly.seasonality = FALSE,
    daily.seasonality = FALSE,
    changepoints = changepoints
)  %>% fit.prophet(prophet_df)

# Plot the results
predictions <- predict(model2, prophet_df)
p <- plot(model2, predictions) + add_changepoints_to_plot(model2)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model2, predictions)

# Check residuals
residuals_model_2 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_2, 36)

# 3. Model with seasonality (yearly and weekly) --------------------------------
model3 <- prophet(
# model3 <- prophet(mcmc.samples = 4000,
    yearly.seasonality = TRUE,
    weekly.seasonality = TRUE,
    daily.seasonality = FALSE,
    changepoints = changepoints
) %>% fit.prophet(prophet_df)

# Plot the results
predictions <- predict(model3, prophet_df)
p <- plot(model3, predictions) + add_changepoints_to_plot(model3)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model3, predictions)

# Check residuals
residuals_model_3 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_3, 36)

# 4. Model with custom seasonality (monthly) -----------------------------------
model4 <- prophet(
    yearly.seasonality = TRUE,
    weekly.seasonality = TRUE,
    daily.seasonality = FALSE,
    changepoints = changepoints
) %>% add_seasonality(
    name = 'monthly',
    period = 30.5,
    fourier.order = 3
) %>% fit.prophet(prophet_df)

# Plot the results
predictions <- predict(model4, prophet_df)
p <- plot(model4, predictions)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model4, predictions)

# Check residuals
residuals_model_4 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_4, 36)

# 5. Model with external regressors --------------------------------------------
ggtsdisplay(fdata$input1, lag.max=36)

ggtsdisplay(fdata$input2, lag.max=36)

# Add regressors to the prophet_df frame
model5 <- prophet(
    yearly.seasonality = TRUE,
    weekly.seasonality = TRUE,
    daily.seasonality = FALSE,
    changepoints = changepoints
) %>% add_seasonality(
    name = 'monthly',
    period = 30.5,
    fourier.order = 5
) %>% add_regressor(
    'x1'
) %>% add_regressor(
    'x2'
) %>% fit.prophet(prophet_df)

# Plot the results
predictions <- predict(model5, prophet_df) 
p <- plot(model5, predictions) 
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model5, predictions)

# Check residuals
residuals_model_5 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_5, 36)

outliers_model_5_indx <- which(abs(residuals_model_5) > (mean(residuals_model_5) + 3 * sd(residuals_model_5)))
print(fdata$date[outliers_model_5_indx])
##  [1] "2015-12-27" "2015-12-29" "2017-06-09" "2018-03-25" "2018-12-25"
##  [6] "2018-12-27" "2018-12-28" "2019-02-23" "2019-06-17" "2019-12-25"
## [11] "2019-12-29" "2020-12-21" "2020-12-28"
# 6. Model with holidays -------------------------------------------------------
# Create synthetic holidays
holidays <- data.frame(
    holiday = 'christmas',
    ds = seq.Date(from = as.Date("2015-12-24"), to = as.Date("2022-12-24"), by = "year"),
    lower_window = 0,  # Holidays affect lower_window previous days
    upper_window = 5   # Holidays affect upper_window next days
)

model6 <- prophet(
    yearly.seasonality = TRUE,
    weekly.seasonality = TRUE,
    daily.seasonality = FALSE,
    changepoints = changepoints,
    holidays = holidays
) %>% add_seasonality(
    name = 'monthly',
    period = 30.5,
    fourier.order = 5
)  %>% add_regressor(
    'x1'
) %>% add_regressor(
    'x2'
) %>% fit.prophet(prophet_df)

# Plot the results
predictions <- predict(model6, prophet_df)
p <- plot(model6, predictions)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model6, predictions)

# Check residuals
residuals_model_6 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_6, 36)

# 7. Model with adjusted hyperparameters ---------------------------------------
# Manually set hyperparameters 
model7 <- prophet(
    yearly.seasonality = 3,
    weekly.seasonality = 3,
    daily.seasonality = FALSE,
    changepoints = changepoints,
    holidays = holidays
) %>% add_seasonality(
    name = 'monthly',
    period = 30.5,
    fourier.order = 5
) %>% add_regressor(
    'x1'
) %>% add_regressor(
    'x2'
) %>% fit.prophet(prophet_df)

# Plot the results
predictions <- predict(model7, prophet_df)
p <- plot(model7, predictions) + add_changepoints_to_plot(model7)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model7, predictions) 

# Check residuals
residuals_model_7 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_7, 36)

# How can we optimize the hyperparameter values?


# 8. Model with cross-validation and hyperparameter tuning ---------------------
# Define hyperparameter grid 
param_grid <- expand.grid(
    yearly_fourier_order = c(1, 3, 5),
    weekly_fourier_order = c(1, 3, 5),
    monthly_fourier_order = c(1, 3, 5)
)

# Initialize a data frame to store results
cv_results <- data.frame()

# Loop over each combination of hyperparameters
for (i in 1:nrow(param_grid)) {
    params <- param_grid[i, ]
    temp_model <- prophet(
        yearly.seasonality = params$yearly_fourier_order,
        weekly.seasonality = params$weekly_fourier_order,
        daily.seasonality = FALSE,
        changepoints = changepoints,
        holidays = holidays
    ) %>% add_seasonality(
        name = 'monthly',
        period = 30.5,
        fourier.order = params$monthly_fourier_order
    ) %>% add_regressor(
        'x1'
    ) %>% add_regressor(
        'x2'
    ) %>% fit.prophet(prophet_df)
    
    # Perform cross-validation
    cv <- cross_validation(
        temp_model,
        initial = 365 * 2,     # First 2 years for training
        period = 180,          # Retrain every 6 months
        horizon = 365,         # Forecast horizon of 1 year
        units = 'days'
    )
    
    # Compute performance metrics
    perf <- performance_metrics(cv)
    
    # Store the results
    cv_results <- rbind(
        cv_results,
        data.frame(
            yearly_fourier_order = params$yearly_fourier_order,
            weekly_fourier_order = params$weekly_fourier_order,
            monthly_fourier_order = params$monthly_fourier_order,
            MAPE = perf$mape[1],
            RMSE = perf$rmse[1]
        )
    )
}
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
print(cv_results)
##    yearly_fourier_order weekly_fourier_order monthly_fourier_order      MAPE
## 1                     1                    1                     1  9.810883
## 2                     3                    1                     1  7.613038
## 3                     5                    1                     1  8.015353
## 4                     1                    3                     1  9.810883
## 5                     3                    3                     1  7.613038
## 6                     5                    3                     1  8.015353
## 7                     1                    5                     1  9.818374
## 8                     3                    5                     1  7.610579
## 9                     5                    5                     1  8.042792
## 10                    1                    1                     3 10.151347
## 11                    3                    1                     3  7.790303
## 12                    5                    1                     3  8.250467
## 13                    1                    3                     3 10.151347
## 14                    3                    3                     3  7.790303
## 15                    5                    3                     3  8.250467
## 16                    1                    5                     3 10.144985
## 17                    3                    5                     3  7.803658
## 18                    5                    5                     3  8.229918
## 19                    1                    1                     5 10.415708
## 20                    3                    1                     5  8.077234
## 21                    5                    1                     5  8.513002
## 22                    1                    3                     5 10.415708
## 23                    3                    3                     5  8.077234
## 24                    5                    3                     5  8.513002
## 25                    1                    5                     5 10.400745
## 26                    3                    5                     5  8.080000
## 27                    5                    5                     5  8.510549
##        RMSE
## 1  1.661312
## 2  1.636427
## 3  1.649546
## 4  1.661312
## 5  1.636427
## 6  1.649546
## 7  1.663086
## 8  1.636987
## 9  1.649173
## 10 1.674417
## 11 1.647289
## 12 1.661413
## 13 1.674417
## 14 1.647289
## 15 1.661413
## 16 1.673142
## 17 1.649093
## 18 1.659843
## 19 1.678496
## 20 1.651586
## 21 1.665529
## 22 1.678496
## 23 1.651586
## 24 1.665529
## 25 1.679063
## 26 1.652705
## 27 1.665236
# Find the best hyperparameters based on RMSE
best_params <- cv_results %>% filter(MAPE == min(MAPE))
print("Best Hyperparameters:")
## [1] "Best Hyperparameters:"
print(best_params)
##   yearly_fourier_order weekly_fourier_order monthly_fourier_order     MAPE
## 1                    3                    5                     1 7.610579
##       RMSE
## 1 1.636987
# Final model with best hyperparameters
model8 <- prophet(
    yearly.seasonality = best_params$yearly_fourier_order[1],
    weekly.seasonality = best_params$weekly_fourier_order[1],
    daily.seasonality = FALSE,
    changepoints = changepoints,
    holidays = holidays
) %>% add_seasonality(
    name = 'monthly',
    period = 30.5,
    fourier.order = best_params$monthly_fourier_order[1]
) %>% add_regressor(
    'x1'
) %>% add_regressor(
    'x2'
) %>% fit.prophet(prophet_df)

# Plot the results
predictions <- predict(model8, prophet_df)
p <- plot(model8, predictions)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model8, predictions)

# Check residuals
residuals_model_8 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_8, 36)

### Predicting new data --------------------------------------------------------
# Make future predictions, including regressors
prophet_df_test <- make_future_dataframe(
    model8, 
    periods = 182, # Number of samples to forecast
    freq = 'days', # Frequency of samples to forecast
    include_history = FALSE # indicate if previous dates should be included
    ) 
prophet_df_test$x1 <- test_data$input1
prophet_df_test$x2 <- test_data$input2

# Plot the results
predictions_test <- predict(model8, prophet_df_test)
p <- plot(model8, predictions_test) + 
    geom_point(data=test_data, aes(x = prophet_df_test$ds, y = output), 
               color = "red", alpha = 0.5)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model8, predictions_test)

residuals_test <- test_data$output - predictions_test$yhat
CheckResiduals.ICAI(residuals_test, 36)

# Evaluate the model
accuracy(predictions$yhat, train_data$output)
##                    ME    RMSE      MAE      MPE     MAPE
## Test set 1.551349e-05 1.52606 1.214273 133.7258 217.6792
accuracy(predictions_test$yhat, test_data$output)
##                 ME     RMSE      MAE      MPE     MAPE
## Test set 0.3384664 1.649087 1.344193 1.274717 8.551801
### Check more features at https://facebook.github.io/prophet/docs/quick_start.html